{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "支持非对称路径的pathsim（参见Hete-CF: Social-Based Collaborative Filtering Recommendation using Heterogeneous Relations）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy.sparse import coo_matrix\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from tqdm import tqdm\n",
    "\n",
    "\n",
    "\n",
    "# 数据处理示例\n",
    "data_path=\"../data/ml-100k/u.data\"\n",
    "data_fields = ['user_id', 'item_id', 'rating', 'timestamp']\n",
    "data_df = pd.read_table(data_path, names=data_fields)\n",
    "\n",
    "# get user number\n",
    "n_users = max(data_df['user_id'].values)\n",
    "# get item number\n",
    "n_items = max(data_df['item_id'].values)\n",
    "\n",
    "\n",
    "data = np.ones((data_df.shape[0]))\n",
    "data=data_df.rating.values\n",
    "row = data_df.user_id-1\n",
    "col = data_df.item_id-1\n",
    "\n",
    "# 计数矩阵 pathCount\n",
    "UI = coo_matrix((data, (row, col)), shape=(n_users, n_items))\n",
    "UIU=UI.dot(UI.transpose())\n",
    "IUI=UI.transpose().dot(UI)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-21T07:41:54.339215Z",
     "start_time": "2020-01-21T07:41:54.328215Z"
    }
   },
   "outputs": [],
   "source": [
    "K=10 # 取相似度最大的top10对象\n",
    "def calSim(path, M, hinSim, hinSimI):\n",
    "    hinSim[path]=np.empty((M.shape[0],K))\n",
    "    hinSimI[path]=np.empty((M.shape[0],K),dtype=np.int)\n",
    "    M=M.tocsc()\n",
    "    col_sum={}\n",
    "    for i in tqdm(range(M.shape[1])):\n",
    "        col_sum[i]=M.getcol(i).toarray().sum()\n",
    "    M=M.tocsr()\n",
    "    row_sum={}\n",
    "    for i in tqdm(range(M.shape[0])):\n",
    "        row_sum[i]=M.getrow(i).toarray().sum()\n",
    "    for i in tqdm(range(M.shape[0])):\n",
    "        M_i=M.getrow(i).toarray()\n",
    "        sim=[]\n",
    "        for j in range(M_i.shape[1]):\n",
    "            M_ij=M_i[0][j]\n",
    "            M_i_=row_sum[i] \n",
    "            M_j_=col_sum[j]\n",
    "\n",
    "            S_ij=2*M_ij/(M_i_+M_j_)\n",
    "            sim.append(S_ij)\n",
    "        sim=np.array(sim)\n",
    "        ids=np.argsort(-sim)\n",
    "        hinSimI[path][i]=ids[:K]\n",
    "        hinSim[path][i]=sim[ids[:K]]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-21T07:42:12.708542Z",
     "start_time": "2020-01-21T07:42:04.103556Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|████████████████████████████████████████████████████████████████████████████| 1682/1682 [00:00<00:00, 8390.51it/s]\n",
      "100%|███████████████████████████████████████████████████████████████████████████| 1682/1682 [00:00<00:00, 19385.05it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████| 1682/1682 [00:02<00:00, 566.70it/s]\n",
      "100%|████████████████████████████████████████████████████████████████████████████| 1682/1682 [00:00<00:00, 8389.27it/s]\n",
      "100%|███████████████████████████████████████████████████████████████████████████| 1682/1682 [00:00<00:00, 19164.82it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████| 1682/1682 [00:02<00:00, 562.92it/s]\n",
      "100%|████████████████████████████████████████████████████████████████████████████| 1682/1682 [00:00<00:00, 9474.71it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████| 943/943 [00:00<00:00, 20555.29it/s]\n",
      "100%|███████████████████████████████████████████████████████████████████████████████| 943/943 [00:01<00:00, 622.45it/s]\n"
     ]
    }
   ],
   "source": [
    "heteSim={}\n",
    "heteSimI={}\n",
    "\n",
    "\n",
    "calSim('UIU',IUI,heteSim, heteSimI)\n",
    "calSim('IUI',IUI,heteSim, heteSimI)\n",
    "calSim('UI',UI,heteSim, heteSimI)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-01-21T07:42:30.336216Z",
     "start_time": "2020-01-21T07:42:30.332227Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.00789809, 0.00610422, 0.00554021, ..., 0.00479463, 0.00469971,\n",
       "        0.00466698],\n",
       "       [0.0041556 , 0.00283757, 0.00274911, ..., 0.00256703, 0.00255534,\n",
       "        0.00254744],\n",
       "       [0.00460345, 0.00230217, 0.00221368, ..., 0.00192756, 0.00189306,\n",
       "        0.001879  ],\n",
       "       ...,\n",
       "       [0.00718563, 0.00598802, 0.00399202, ..., 0.00242388, 0.00208578,\n",
       "        0.00190331],\n",
       "       [0.00278035, 0.00198577, 0.00188828, ..., 0.00059067, 0.00051078,\n",
       "        0.0005034 ],\n",
       "       [0.00281162, 0.0013132 , 0.0009564 , ..., 0.00040182, 0.00036791,\n",
       "        0.00035051]])"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "heteSim['UIU']"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
